#This is the script we have used in our paper in RAC.

#We developed it for R Studio software. Hence, to run it
#successfully, you will need to have installed R and R Studio on your computer.

#After doing it, you must download the "Consumer.xlxs" dataset. 
#To do it, open R Studio and go to its upper menu and follow this sequence: 
#File/Import Dataset/From Excel.

#Now, you are ready to run this script. To help you to understand clearly each
#procedure we adopted, we divided our script into 17 steps. 

#Step 1: installing the packages into R Studio.

#R language demands users install packages to run specific techniques 
#such as cluster analysis. Therefore, you are supposed to do it before
#running this script. The command to install the packages you will need 
#in this script are:

#install.packages(c("stats", "factoextra", "fpc","dbscan", "cluster"))

#Please be aware that the package installation is a "once in a lifetime" procedure:
#you do not need to use this function every time you want to run the script.
#Because of that, we have written this function with the "#" symbol to avoid it
#runs again. If you need to use it, remove that symbol before running the script.

#Step 2: loading the installed packages

#You need to load the packages you have just installed into R Studio. 
#To do it, run the following commands:

library(stats)
library(factoextra)
library(fpc)
library(dbscan)
library(cluster)

#Step 3: omitting the missing data

#We have not dealt with the missing data issue because this is a 
#subject matter beyond the scope of our paper. Therefore, we had to omit
#those data from the "Consumer.xlsx" file. To do it, we used the "na.omit ()" function.

ConsumerNoMissing <- na.omit(Consumer)

#Step 4: standardizing the quantitative variables

#The cluster techniques we have used in our paper demand that all the quantitative
#variables be on the same scale. To do that, we have standardized those variables,
#by using the function "scale()" and storing the output in the new object "ConsumerScale".

ConsumerScale <- scale(ConsumerNoMissing[seq(2, 51)]) 

#Please be aware the quantitative variables were from column 2 to 51 in our dataset. 
#That is the reason we have used the subscript function "[seq(2, 51)]" in the command line.

#Step 5: creating a sample.

#Our original dataset is too large to ensure a good visualization of the plots. Hence,
#we worked on a sample with n = 108, which comprises about 5% of the original dataset. This
#new size allowed plots to be visualized satisfactory. We stored the extracted data in the object
#"ConsumerScaleSample". 

ConsumerScaleSample <- head(ConsumerScale, 108)
print(ConsumerScaleSample)

#Step 6: identifying the optimal number of clusters in K-Means technique
#by the elbow and the silhouette methods

#We have explained those methods in our paper. The syntax of the following commands is 
#necessary to create the plots.

#We added command lines before and after the "fviz_nbclust" function. Those lines allow
#readers to save the figures in high definition in their devices. We used the .tiff
#format because it provides a good balance between the file size and the image quality.


tiff("Figure1.tiff", family = "Times New Roman", pointsize=10, width=2800, height=2000, res=600)
fviz_nbclust(ConsumerScaleSample, kmeans, method = "wss") + geom_vline(xintercept = 2, 
        linetype = 2) + labs(subtitle = "Elbow method")
dev.off()

tiff("Figure2.tiff", family = "Times New Roman", pointsize=10, width=2800, height=2000, res=600)
fviz_nbclust(ConsumerScaleSample, kmeans, method = "silhouette") + 
        labs(subtitle = "Average silhouette method")
dev.off()

#Step 7: running K-Means clustering with K = 2 

km.Consumer <- kmeans(ConsumerScaleSample, 2, nstart = 30)

#Step 8: plotting the output from K-Means clustering with K = 2 

tiff("Figure3.tiff", family = "Times New Roman", pointsize=10, width=2800, height=2000, res=600)
fviz_cluster(km.Consumer, data = ConsumerScaleSample, palette = "jco",
             geom = "point", ellipse.type = "norm",
             repel = TRUE, ggtheme = theme_minimal())
dev.off()

#Step 9: estimating the optimal number of clusters in the PAM.

#We have changed the K-Means clusterization to incorporate the PAM algorithm. Therefore, we had
#to re-estimate the optimal number of clusters with the elbow and the silhouette methods.

tiff("Figure4.tiff", family = "Times New Roman", pointsize=10, width=2800, height=2000, res=600)
fviz_nbclust(ConsumerScaleSample, pam, method = "wss") + geom_vline(xintercept = 2,
        linetype = 2) + labs(subtitle = "Elbow method") 
dev.off()

tiff("Figure5.tiff", family = "Times New Roman", pointsize=10, width=2800, height=2000, res=600)
fviz_nbclust(ConsumerScaleSample, pam, method = "silhouette") + labs(subtitle = "Average silhouette method")
dev.off()

#Step 10: running the K-Means clustering with PAM and K = 2

#We have used the "pam()" function to run the K-Means clustering. We have stored the output 
#in the object "pam.Consumer". 

pam.Consumer <- pam(ConsumerScaleSample, 2) 

#Step 11: plotting the output

#We plotted the output by using the "fviz_cluster()" function. 

tiff("Figure6.tiff", family = "Times New Roman", pointsize=10, width=2800, height=2000, res=600)
fviz_cluster(pam.Consumer, data = ConsumerScaleSample, palette = "jco",
        geom = "point", ellipse.type = "norm", repel = TRUE, ggtheme = theme_minimal()) 
dev.off()

#Step 12: initiating the DBSCAN

#Our first procedure for running the DBSCAN was to identify the eps. To do it, we plotted the 
#output for K = 2

tiff("Figure7.tiff", family = "Times New Roman", pointsize=10, width=2800, height=2000, res=600)
dbscan::kNNdistplot(ConsumerScaleSample, k = 2)
abline(h = 2.87, lty = 2)
dev.off()

#Step 13: running DBSCAN with K = 2 and eps = 2.87

#We have run DBSCAN with those parameters and stored the output in the object "db.Consumer". 

db.Consumer <- fpc::dbscan(ConsumerScaleSample, eps = 2.87, MinPts = 3) 

#Step 14: plotting the output from DBSCAN

#We have plotted the output with the function "fviz_cluster()".

tiff("Figure8.tiff", family = "Times New Roman", pointsize=10, width=2800, height=2000, res=600)
fviz_cluster(db.Consumer, data = ConsumerScaleSample, stand = FALSE, ellipse.type = "norm",
        show.clust.cent = FALSE, geom = "point", palette = "jco", ggtheme = theme_classic())
dev.off()

#Step 15: identifying the outliers in DBSCAN

#We have used the following command to identify the outliers after running the DBSCAN.
#The noise or outlier observations are coded as 0.

db.Consumer$cluster

#Step 16: running the FCM clustering with K = 2.

#We have run the latest clustering technique (FCM) by using the "fanny()" function and storing
#the output in the object "fuz.Consumer". We have printed the output with the function "print()".

fuz.Consumer <- fanny(ConsumerScaleSample, 2) 
print(fuz.Consumer)

#Step 17: plotting the output

#We have plotted the output from FCM by using the "fviz_cluster()" function.

tiff("Figure9.tiff", family = "Times New Roman", pointsize=10, width=2800, height=2000, res=600)
fviz_cluster(fuz.Consumer, ellipse.type = "norm", repel = TRUE, palette = "jco", 
             geom = "point", ggtheme = theme_minimal(), legend = "right")
dev.off()
